# change to your local directory
setwd("/Users/John/Desktop/dropbox/Switchgrass_PlantPhys/run_full_analysis")

#clear the environment
rm(list=ls())

#install the package with R functions
library(devtools)
install_github("jtlovell/limmaDE2")
## Skipping install for github remote, the SHA1 (934bcb5b) has not changed since last install.
##   Use `force = TRUE` to force installation
library(limmaDE2)
## Loading required package: limma
## Loading required package: SimSeq
library(ggplot2)
# Note 1. to load this package, you need to have the following packages installed:
# limma, SimSeq
# Install by:
# source("https://bioconductor.org/biocLite.R")
# biocLite() # only do this if you have not installed the base bioconductor packages -
#     ### can take a while
# biocLite(c("limma","SimSeq"))

# Note 2. To use some of the functions in this package you need a few other packages installed:
# ggplot2, edgeR, venneuler, qvalue, qdap, reshape2, dendroextras, Heatplus, gdata
# install by:
# biocLite(c("Heatplus", "qvalue","edgeR"))
# install.packages("ggplot2", "venneuler", "qdap", "reshape2", "dendroextras", "gdata")

#install the package with data
install_github("jtlovell/PhyGenomicsData.PV2016")
## Skipping install for github remote, the SHA1 (7f8afa38) has not changed since last install.
##   Use `force = TRUE` to force installation
library(PhyGenomicsData.PV2016)

# Run just the 2012 data analysis:
### for conviencence, rename info and counts
info<-info12
counts<-counts12

#################################
# Part 1: Full model with Treatment
#################################
## Run LIMMA, with subplot treated as repeated measures of the same plant.
### The only response is treatment. Sub_Block is the sub-block in the split plot design, containing two AP13
###    plants. The duplicate correlation and blocking in LIMMA should account for correlations between these plants
### We do not estimate intercept, so that the model only looks at the effects of Treatment, and the F-test reflects this
## Run the modeling pipeline
stats<-pipeLIMMA(counts=counts, info=info, block=info$Sub_Block, formula="~ Treatment")
## calculating normalization factors ... 
## running voom normalization correcting for quality weights ...

## calculating duplicate correlation among replicates ... 
## fitting linear model ... 
## processing statistics and calculating q-values ...
v<-stats$voom[["E"]]
stats.model1<-stats$stats

#################################
# Part 2: All pairwise contrasts
#################################
## Generate a design matrix that specifies all relavant contrasts
f<-info$Treatment
design <- model.matrix(~0+f)
contrast.matrix<-makeContrasts(
  f25th-flow, fmean-flow, fambient-flow, f75th-flow, fhigh-flow, fmean-f25th, fambient-f25th, fhigh-f25th,
  fambient-fmean, f75th-fmean, fhigh-fmean, f75th-fambient, fhigh-fambient, fhigh-f75th, levels=design)
stats<-pipeLIMMA(counts=counts, info=info, block=NULL, design=design, contrast.matrix=contrast.matrix)
## calculating normalization factors ... 
## running voom normalization correcting for quality weights ...

## fitting model to contrast matrix ... 
## processing statistics and calculating q-values ...
stats.model1.contrasts<-stats$stats

## Look at some of the effects, for example, side-by-side volcano plots
par.original <- par(no.readonly=TRUE)
par(mfrow=c(2,2))
with(stats.model1.contrasts,
     volcanoPlot(pval=ebayesPvalue_fhigh...flow, lfc=fhigh...flow_logFC,
                 sig=ebayesQvalue_fhigh...flow, main="high vs. low",
                 ylim=c(0,7), xlim=c(-6.1,6.1), legpos=c(-.6,0)))
##       sig
## updown FALSE TRUE
##   down  9300  884
##   up    8325  652
with(stats.model1.contrasts,
     volcanoPlot(pval=ebayesPvalue_f75th...flow,  lfc=f75th...flow_logFC,
                 sig=ebayesQvalue_f75th...flow, main="75th vs. low",
                 ylim=c(0,7), xlim=c(-6.1,6.1), legpos=c(-.6,0)))
##       sig
## updown FALSE  TRUE
##   down 10223    62
##   up    8813    63
with(stats.model1.contrasts,
     volcanoPlot(pval=ebayesPvalue_fmean...flow, lfc=fmean...flow_logFC,
                 sig=ebayesQvalue_fmean...flow, main="mean vs. low",
                 ylim=c(0,7), xlim=c(-6.1,6.1), legpos=c(-.6,0)))
##       sig
## updown FALSE TRUE
##   down  9854    1
##   up    9305    1
with(stats.model1.contrasts,
     volcanoPlot(pval=ebayesPvalue_f25th...flow, lfc=f25th...flow_logFC,
                 sig=ebayesQvalue_f25th...flow, main="25th vs. low",
                 ylim=c(0,7), xlim=c(-6.1,6.1), legpos=c(-.6,0)))

##       sig
## updown FALSE
##   down 10123
##   up    9038
par(par.original)

## Also, plot the effects across treatments for the top 20 genes
tp.trt<-topGeneRxn(v=v, info=info,
                     sig=stats.model1.contrasts$Fqvalue,
                     pointcols=NULL, paletteChoice="RdBu",
                     xdat="Treatment")

tp.mdwp<-topGeneRxn(v=v, info=info,
                     sig=stats.model1.contrasts$Fqvalue,
                     pointcols=c("darkred","forestgreen","skyblue"), paletteChoice=NULL,
                     xdat="MDWP", coldat="Treatment", xlab="Mid-day Leaf Water Potential (MPa)")

#################################
# Part 3: Run PCA
#################################
pca<-voom2PCA(v=v, info=info, ids=info$ID)

ggplot(pca, aes(x=PC1, y=PC2, col=Treatment, alpha=Treatment))+
  theme_bw()+geom_point(size=4)

#################################
# Part 4: Subset to lines with physiology data
#################################
phys.lines<-info$ID[!is.na(info$order)]
counts<-counts[,phys.lines]
info<-info[info$ID %in% phys.lines,]
info$Treatment<-factor(info$Treatment, levels=c("low","mean","high"))

#################################
# Part 5: Run model with MDWP as the predictor
#################################
stats<-pipeLIMMA(counts=counts, info=info, block=info$Sub_Block, formula="~ MDWP")
## calculating normalization factors ... 
## running voom normalization correcting for quality weights ...

## calculating duplicate correlation among replicates ... 
## fitting linear model ... 
## processing statistics and calculating q-values ...
v<-stats$voom[["E"]]
stats.allests.mdwp<-stats$stats
par(mfrow=c(1,1))
with(stats.allests.mdwp,
     volcanoPlot(pval=ebayesPvalue_MDWP, lfc=MDWP_logFC,
                 sig=ebayesQvalue_MDWP, main="high vs. low"))

##       sig
## updown FALSE TRUE
##   down  9439  793
##   up    8184  745
# plot the top genes for mdwp
tp.mdwp<-topGeneRxn(v=v, info=info,
                    sig=stats.allests.mdwp$ebayesQvalue_MDWP,
                    pointcols=c("darkred","forestgreen","skyblue"), paletteChoice=NULL,
                    xdat="MDWP", coldat="Treatment", xlab="Mid-day Leaf Water Potential (MPa)")

# pairwise plot of treatment (hvl and mdwp)
volcanoPair(lfc1=stats.allests.mdwp$MDWP_logFC,
            lfc2=stats.model1.contrasts$fhigh...flow_logFC,
            sig1=stats.allests.mdwp$ebayesQvalue_MDWP,
            sig2=stats.model1.contrasts$ebayesQvalue_fhigh...flow,
            xlab="mdwp Log2 Fold Change", ylab="treatment high vs. low log2 fold change")

## 
##  both  sig2    NS  sig1 
##   978   558 17065   560
# pull out strongest genes that have mdwp effect but not treatment
wp.genes<-stats.allests.mdwp$gene[stats.allests.mdwp$ebayesQvalue_MDWP<=0.01 & stats.model1.contrasts$ebayesQvalue_fhigh...flow>=0.05]
tp.mdwp<-topGeneRxn(v=v, info=info,
                    geneIDs=wp.genes,
                    sig=stats.allests.mdwp$ebayesQvalue_MDWP,
                    pointcols=c("darkred","forestgreen","skyblue"), paletteChoice=NULL,
                    xdat="MDWP", coldat="Treatment", xlab="Mid-day Leaf Water Potential (MPa)",
                    title="effect of genes with strong MDWP effects, \n but weak treatment effects")

# make venn diagram of the number of gene effected by MDWP and treatment
mdwp.trt.Qvals<-data.frame(MDWP.qvalue=stats.allests.mdwp$ebayesQvalue_MDWP, trt.qvalue=stats.model1.contrasts$ebayesQvalue_fhigh...flow)
sig.q.05<-makeBinarySig(mdwp.trt.Qvals, what="qvalue", alpha=0.05)
## MDWP.qvalue  1538    
## trt.qvalue   1536    
counts2Venn(x=sig.q.05, cols=c(1,2), names=c("MDWP","high vs. low"), colors=c("darkblue","red"),
            main="Comparison of models", type="limma", legy=-3.2, legx=-3.5)

#################################
# Part 6: Run model with Sampling Order as the predictor
#################################
stats<-pipeLIMMA(counts=counts, info=info, block=info$Sub_Block, formula="~ order")
## calculating normalization factors ... 
## running voom normalization correcting for quality weights ...

## calculating duplicate correlation among replicates ... 
## fitting linear model ... 
## processing statistics and calculating q-values ...
stats.allests.order<-stats$stats
with(stats.allests.order,
     volcanoPlot(pval=ebayesPvalue_order, lfc=order_logFC,
                 sig=ebayesQvalue_order, main="high vs. low"))

##       sig
## updown FALSE TRUE
##   down  9277   30
##   up    9816   38
tp.mdwp<-topGeneRxn(v=v, info=info,
                    sig=stats.allests.order$ebayesQvalue_order,
                    pointcols=c("darkred","forestgreen","skyblue"), paletteChoice=NULL,
                    xdat="order", coldat="Treatment", xlab="Order of sample collection \n 1st @ 11:00, last @ 13:00")

# save(stats.fullmodel, stats.allests, lim.contrasts, pca, v,
#      stats.fullmodel.mdwp, stats.allests.mdwp, stats.fullmodel.order, stats.allests.order,stats.fullmodel.subtrt, stats.allests.subtrt,
#      file="/Users/John/Desktop/dropbox/Switchgrass_PlantPhys/stats_output/tempe2012_allstats.RData")